R libraries.

library(data.table)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(stringr)
library(cowplot)

library(tidyr)
theme_set(theme_classic())

release_name = params$release_name
release_name
## [1] "2025_06_JAX_RNAseq08"

Read in results

result_dir = file.path("results", release_name)
enrichment_output_dir = file.path(result_dir, "enrichment")

res = readRDS(file.path(enrichment_output_dir, 
                        sprintf("%s_enrichment.rds", release_name)))
names(res)
##  [1] "PrS_day_6_hyp_ASCL2_PTC_up"          "PrS_day_6_hyp_ASCL2_PTC_down"       
##  [3] "PrS_day_6_hyp_ELF5_PTC_up"           "PrS_day_6_hyp_ELF5_PTC_down"        
##  [5] "PrS_day_6_hyp_HIF1A_PTC_up"          "PrS_day_6_hyp_HIF1A_PTC_down"       
##  [7] "PrS_day_6_nor_DLX3_PTC_up"           "PrS_day_6_nor_DLX3_PTC_down"        
##  [9] "PrS_day_6_nor_ELF5_PTC_up"           "PrS_day_6_nor_ELF5_PTC_down"        
## [11] "PrS_day_6_nor_MSX2_PTC_up"           "PrS_day_6_nor_MSX2_PTC_down"        
## [13] "PrS_day_6_nor_NR2F2_PTC_up"          "PrS_day_6_nor_NR2F2_PTC_down"       
## [15] "PrS_day_6_nor_VGLL1_PTC_up"          "PrS_day_6_ASCL2_PTC_hyp_vs_nor_up"  
## [17] "PrS_day_6_ASCL2_PTC_hyp_vs_nor_down" "PrS_day_6_ELF5_PTC_hyp_vs_nor_up"   
## [19] "PrS_day_6_ELF5_PTC_hyp_vs_nor_down"  "PrS_day_6_HIF1A_PTC_hyp_vs_nor_up"  
## [21] "PrS_day_6_HIF1A_PTC_hyp_vs_nor_down" "PrS_day_6_TP63_PTC_hyp_vs_nor_up"   
## [23] "PrS_day_6_TP63_PTC_hyp_vs_nor_down"  "PrS_day_6_WT_WT_hyp_vs_nor_up"      
## [25] "PrS_day_6_WT_WT_hyp_vs_nor_down"
multi_strategy_datasets = c("2023_12_JAX_RNAseq_ExtraEmbryonic",
                            "2024_05_JAX_RNAseq2_ExtraEmbryonic", 
                            "2024_06_JAX_RNAseq_Neuroectoderm", 
                            "2024_09_JAX_RNAseq6_Diverse", 
                            "2024_09_JAX_RNAseq7_Reversion")
  
if (release_name%in%multi_strategy_datasets){
  
  to_plot = grep("PTC|reverted|WT_WT_hyp_vs_nor", names(res))
  res = res[to_plot]

}

sapply(res, function(x){sapply(x,nrow)})
## $PrS_day_6_hyp_ASCL2_PTC_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_hyp_ASCL2_PTC_down
## reactome    go_bp 
##        5       10 
## 
## $PrS_day_6_hyp_ELF5_PTC_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_hyp_ELF5_PTC_down
## reactome    go_bp 
##        2       10 
## 
## $PrS_day_6_hyp_HIF1A_PTC_up
## go_bp 
##    10 
## 
## $PrS_day_6_hyp_HIF1A_PTC_down
## reactome    go_bp 
##        5       10 
## 
## $PrS_day_6_nor_DLX3_PTC_up
## reactome    go_bp 
##        4       10 
## 
## $PrS_day_6_nor_DLX3_PTC_down
## go_bp 
##     1 
## 
## $PrS_day_6_nor_ELF5_PTC_up
## reactome    go_bp 
##        2       10 
## 
## $PrS_day_6_nor_ELF5_PTC_down
## reactome 
##        1 
## 
## $PrS_day_6_nor_MSX2_PTC_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_nor_MSX2_PTC_down
## reactome    go_bp 
##        1       10 
## 
## $PrS_day_6_nor_NR2F2_PTC_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_nor_NR2F2_PTC_down
## reactome    go_bp 
##        7       10 
## 
## $PrS_day_6_nor_VGLL1_PTC_up
## go_bp 
##     1 
## 
## $PrS_day_6_ASCL2_PTC_hyp_vs_nor_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_ASCL2_PTC_hyp_vs_nor_down
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_ELF5_PTC_hyp_vs_nor_up
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_ELF5_PTC_hyp_vs_nor_down
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_HIF1A_PTC_hyp_vs_nor_up
## reactome    go_bp 
##        9       10 
## 
## $PrS_day_6_HIF1A_PTC_hyp_vs_nor_down
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_TP63_PTC_hyp_vs_nor_up
## reactome    go_bp 
##        3       10 
## 
## $PrS_day_6_TP63_PTC_hyp_vs_nor_down
## reactome    go_bp 
##       10       10 
## 
## $PrS_day_6_WT_WT_hyp_vs_nor_up
## go_bp 
##    10 
## 
## $PrS_day_6_WT_WT_hyp_vs_nor_down
## reactome    go_bp 
##       10       10
df_list = list()

for(label in names(res)){
  res1 = res[[label]]
  names(res1)[which(names(res1)=="go_bp")] = "GO biological process"
  type = names(res1)
  
  df_go = NULL
  
  for(gset1 in type){
    res_g1 = res1[[gset1]]
    res_g1$type = gset1
    df_go = rbind(df_go, res_g1)
  }
  
  df_list[[label]] = df_go
}

Prepare output directories for saving figure files out

df_size = NULL

figure_dir = file.path("figures", release_name)
goseq_figure_dir = file.path(figure_dir, "goseq_plots")

if (!file.exists(goseq_figure_dir)){
  dir.create(goseq_figure_dir, recursive = TRUE)
}

Generate plots

nchar_limit = 70

for(k in 1:length(df_list)){
  d1 = df_list[[k]]
  
  if(sum(d1$FDR < 0.05) == 0){ next }
  
  d1 = d1[d1$FDR < 0.05,]
  d1 = d1[order(d1$FDR, decreasing = TRUE),]
  
  d1_label = sapply(d1$category, function(x) {
    space_positions = gregexpr(" ", x)[[1]]
    if (nchar(x) > nchar_limit) {
      break_pos = max(space_positions[space_positions < nchar_limit])
      return(paste0(paste0(rep(" ", nchar_limit-break_pos), collapse=""),
                    substr(x, 1, break_pos), "\n",
                    substr(x, break_pos+1, nchar(x))))
    } else {
      # x = paste0(paste0(rep(" ", 1.1*(nchar_limit-nchar(x))), collapse=""), x)
      return(x)
    }
  })

  d1$y = 1:nrow(d1)
  nm1 = names(df_list)[k]
  nm1 = gsub("_", " ", nm1)
  nm1 = gsub("PTC ", "", nm1)
  nm1 = gsub("nor ", "Normoxia ", nm1)
  nm1 = gsub("hyp ", "Hypoxia ", nm1)
  nm1 = gsub("up", "up-regulated", nm1)
  nm1 = gsub("down", "down-regulated", nm1)
  
  if (grepl("Hypoxia vs Normoxia", nm1)){
    nm1_substrings = strsplit(nm1, " Hypoxia vs Normoxia ")[[1]]
    nm1 = paste0(nm1_substrings[1], 
                 "\nHypoxia vs Normoxia\n", 
                 nm1_substrings[2])
  }else{
    if (!grepl("EPAS1", nm1)){ # keep Normoxia for comparisons involving EPAS1 gene
      nm1 = gsub("Normoxia ", "", nm1)
    }
    nm1 = sub("reverted ", "", nm1) # only replace the first occurrence of "reverted "
    substrings = str_split(nm1, " ")[[1]]
    nm1 = paste0(paste(substrings[1:(length(substrings)-1)], collapse = " "), 
                 "\n", paste(substrings[length(substrings)], collapse = " "))
  }
  
  mycolors <- c("GO biological process" = "#F8766D", "reactome" = "#00BFC4")
  filtered_colors <- mycolors[unique(d1$type)]
  
  g1 = ggplot(d1, aes(x = -log10(FDR), y = y, color=type)) +
    geom_point(size = 2.5) + 
    labs(x = "-log10(FDR)", y="", title = nm1) + 
    scale_y_continuous(breaks = 1:nrow(d1), 
                       labels = d1_label, 
                       limits = c(0.8, nrow(d1)+0.2)) + 
    theme(legend.position = "top") + 
    scale_color_manual(values = filtered_colors) + 
    guides(color = guide_legend(title = NULL))

  h1 = 0.75*(nrow(d1)+0.4)/20 + 0.25
  if (nrow(d1)<=2){
    h1 = 0.75*(nrow(d1)+1)/20 + 0.25
  }
  
  canvas <- ggdraw() + draw_plot(g1, x = 0, y = 1-h1, width = 1, height = h1)    
  print(canvas)
  
  if (!grepl("Hypoxia vs Normoxia", nm1)){
    
    g2 = ggplot(d1, aes(x = -log10(FDR), y = y, color=type)) +
      geom_point(size = 2.5) + 
      labs(x = "-log10(FDR)", y="") + 
      scale_y_continuous(breaks = 1:nrow(d1), 
                         labels = d1_label, 
                         limits = c(0.8, nrow(d1)+0.2)) + 
      theme(legend.position = "top") + 
      scale_color_manual(values = filtered_colors) + 
      guides(color = guide_legend(title = NULL)) + 
      theme(
        panel.background = element_rect(fill = "transparent", color = NA),
        plot.background = element_rect(fill = "transparent", color = NA),
        legend.background = element_rect(fill = "transparent", color = NA)
        # axis.text.y = element_text(size = 10)
       )
    
    h1 = 0.75*(nrow(d1)+0.4)/20 + 0.25
    if (nrow(d1)<=2){
      h1 = 0.75*(nrow(d1)+1)/20 + 0.25
    }

    canvas2 <- ggdraw() + draw_plot(g2, x = 0, y = 0.5 - h1/2, width = 1, height = h1)
    
    saved_figure_name = gsub("\n", "_", nm1)
    saved_figure_name = paste0(gsub(" ", "_", saved_figure_name), ".svg")
  
    ggsave(file=file.path(goseq_figure_dir, saved_figure_name), 
           plot=canvas2, width=8, height=5, bg = "transparent", units="in")
    
    png_figure_name = gsub("\n", "_", nm1)
    png_figure_name = paste0(gsub(" ", "_", png_figure_name), ".png")
    
    ggsave(file=file.path(goseq_figure_dir, png_figure_name), 
           plot=canvas2, width=1920, height=1200, bg = "transparent", units="px")
  }
  
}

gc()
##           used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells 1970702 105.3    3620691 193.4         NA  3620691 193.4
## Vcells 4325953  33.1   10146329  77.5      32768  8388510  64.0
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.6.1
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] tidyr_1.3.1       cowplot_1.1.3     stringr_1.5.1     ggrepel_0.9.5     ggpubr_0.6.0     
## [6] ggplot2_3.5.1     data.table_1.16.0
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.9        utf8_1.2.4        generics_0.1.3    rstatix_0.7.2     stringi_1.8.4    
##  [6] digest_0.6.37     magrittr_2.0.3    evaluate_0.24.0   grid_4.4.1        pkgload_1.4.0    
## [11] fastmap_1.2.0     jsonlite_1.8.8    backports_1.5.0   purrr_1.0.2       fansi_1.0.6      
## [16] scales_1.3.0      textshaping_0.4.0 jquerylib_0.1.4   abind_1.4-5       cli_3.6.3        
## [21] rlang_1.1.4       munsell_0.5.1     withr_3.0.1       cachem_1.1.0      yaml_2.3.10      
## [26] tools_4.4.1       ggsignif_0.6.4    dplyr_1.1.4       colorspace_2.1-1  broom_1.0.6      
## [31] vctrs_0.6.5       R6_2.5.1          lifecycle_1.0.4   car_3.1-2         ragg_1.3.2       
## [36] pkgconfig_2.0.3   pillar_1.9.0      bslib_0.8.0       gtable_0.3.5      glue_1.7.0       
## [41] Rcpp_1.0.13       systemfonts_1.2.3 highr_0.11        xfun_0.47         tibble_3.2.1     
## [46] tidyselect_1.2.1  rstudioapi_0.16.0 knitr_1.48        farver_2.1.2      htmltools_0.5.8.1
## [51] svglite_2.2.1     rmarkdown_2.28    carData_3.0-5     labeling_0.4.3    compiler_4.4.1